"""
Traverse all the repositories recurrsively to find repos containing bazel files.
Returns a CSV file with the list of repos.
"""
import os
from glob import glob
from glob import iglob
import pandas as pd
project_dir = os.path.dirname(os.path.abspath(__file__))
repository_dir = "clonned_repos"
output_dir = "results"
columns = ["username", "repository", "pattern", "path"]
patterns = ["BUILD.bazel"]

data = []
vendor_less_data = []
uniq = set()
uniq_data = []
for pattern in patterns:
    file_path = os.path.join(project_dir, repository_dir, "*", "*", pattern)
    print(file_path)
    for i, f in enumerate(iglob(file_path, recursive=True), 1):
        print(file_path)
        username, repo = f.split(repository_dir)[1].split("/")[1:3]
        info = {
            "username": username,
            "repository": repo,
            "pattern": pattern,
            "path": f
        }
        print(username,repo)
        print(i, f)
        data.append(info)
        if 'vendor' not in f:
            vendor_less_data.append(info)
            if username+repo not in uniq:
                uniq.add(username+repo)
                uniq_data.append(info)
# if data:
#     pd.DataFrame(columns=columns, data=data).to_csv("{}/{}/{}".format(project_dir, output_dir, "current.csv"), index=False)
# if vendor_less_data:
#     pd.DataFrame(columns=columns, data=vendor_less_data).to_csv("{}/{}/{}".format(project_dir, output_dir, "vendorless_current.csv"), index=False)
if uniq_data:
    pd.DataFrame(columns=columns, data=uniq_data).to_csv("{}/{}/{}".format(project_dir, output_dir, "current.csv"), index=False)
